# delimit ;
clear ;
set more off ;
est drop _all ;
cd "R:/personlig/fenellac/menarche_replication/build" ;

* **************************************************************************** ;
* This code constructs a dataset on eligible women from IHDS 2 (2011-2012) 
* containing regressors/control variables. Eligible women are ever-married 15-49 
*
* Paper: "Age of Marriage and Women's Political Engagement: Evidence from India"
* Authors: Fenella Carpena, Francesca Jensenius
* 
* Code by Fenella Carpena
* Last update: April 28, 2020
* **************************************************************************** ;

****************************************************************************** ;
* get datasets that I will need 
****************************************************************************** ;

use "./temp/ihds02-DS03-eligible-women.dta", clear ;
isid IDHH PERSONID ;
	
save "./temp/temp-women", replace ;

use "./temp/ihds02-DS01-individual.dta", clear ;
	
	* one person has a missing personID, so drop this observation ;
	drop if missing(PERSONID) & missing(IDPERSON) ;
		
isid IDHH PERSONID ;
save "./temp/temp-hh-roster", replace ;

use "./temp/ihds02-DS02-household.dta", clear ;
isid IDHH ;

save "./temp/temp-hh", replace ;

****************************************************************************** ;
* merge all datasets together ;
****************************************************************************** ;

* use eligible women data as master ;
use "./temp/temp-women", clear ;

* merge with HH roster data ;
merge 1:1 IDHH PERSONID using "./temp/temp-hh-roster" ;
tab _merge ;
assert _merge != 1 ;
keep if _merge == 3 ;
drop _merge ;

* merge with HH level data ;
merge m:1 IDHH using "./temp/temp-hh" ; 
tab _merge ;
assert _merge != 1 ;
keep if _merge == 3 ;
drop _merge ;

****************************************************************************** ;
* eligible woman: age
****************************************************************************** ;

* generate variable ;
gen age = EW6 ;
label var age "Age" ;

* cross-check with HH-roster: eligible women are ever-married 15-49, so these
* women should have: (1) sex = F, (2) marital status that is married (or separated) 
* (3) age reported in EW6 should match the HH roster in variables RO5 ;

	* check that gender indicated in HH roster female ; 
	* note: I find that all are women as expected ;
	tab RO3 ;
	count if RO3 != 2 ;
	if r(N) > 0 exit ;
		
	* check that marital status of the individual is married/widowed/divorced ;
	* note: all are married/sep/divorced as expected ;
	tab RO6 ;
	count if RO6 == 2 ; * Code = 2 for unmarried ;
	if r(N) > 0 exit ;
		
	* check that the age variable in the hh-roster matches the age indicated in the eligible woman survey ;
	* note: only N=33 observations don't match so it seems fine ;
	count if age != RO5 ; 
	
****************************************************************************** ;
* eligible woman: marital status
****************************************************************************** ;

gen marital_status = RO6 ;
label values marital_status RO6 ;
label var marital_status "Marital Status" ;

* generate variables for marital status ;
gen married = marital_status == 0 | marital_status == 1 ;
gen widowed = marital_status == 3 ;
gen separated = marital_status == 4 ;
gen prev_married = widowed | separated ;

* if there are responses to the questions about discussions with husband at home,
* assume that the person is still married ;
foreach var of varlist widowed separated prev_married { ;
	replace `var' = 0 if GR29A != . | GR29B != . | GR29C != . ;
} ;

replace married = 1 if  GR29A != . | GR29B != . | GR29C != . ;

* label variables ;
label var married "Married" ;
label var widowed "Widow" ; 
label var prev_married "Widow/Separated/Divorced" ; 
label var separated "Separated/Divorced" ;

drop marital_status ;

****************************************************************************** ;
* eligible woman: ever remarried
****************************************************************************** ;

gen ever_remarried = MH15 == 2 if MH15 != . ;
label var ever_remarried "Ever remarried" ;

****************************************************************************** ;
* eligible woman: marriage age (at current marriage) ;
****************************************************************************** ;

* generate variable ;
gen marriage_age = MH1A ;

* winsorize left 1% which (effectively replaces ages 0-7 with 8) ;
summ marriage_age, detail ;
replace marriage_age = r(p1) if marriage_age < r(p1) ;
summ marriage_age, detail ;

label var marriage_age "Marriage age, winsorized bottom 1%" ;

****************************************************************************** ;
* eligible woman: age at gauna (current marriage) ;
****************************************************************************** ;

* generate variable ;
gen gauna_age = MH1C ;

* winsorize left 1% (effectively replaces ages 1-11 with 12) ;
summ gauna_age, detail ;
replace gauna_age = r(p1) if gauna_age < r(p1) ;
summ gauna_age, detail ;

label var gauna_age "Gauna age, winsorized bottom 1%" ;

****************************************************************************** ;
* eligible woman: age at menarche 
****************************************************************************** ;

* generate variable ;
gen menarche_age = MH1E;
label var menarche_age "Menarche age" ;

****************************************************************************** ;
* eligible woman: parents' education ; 
****************************************************************************** ;

gen mother_ever_schooled = EW14A == 1 if EW14A != . ;
gen father_ever_schooled = EW14B == 1 if EW14B != . ;

gen mother_yrs_educ = EW15A ;
gen father_yrs_educ = EW15B ;

label var mother_ever_schooled "Respondent's mother ever schooled" ;
label var father_ever_schooled "Respondent's father ever schooled" ;
label var mother_yrs_educ "Mother's years of education" ;
label var father_yrs_educ "Father's years of education" ;

* making "ever schooled" and "years schooling" consistent, 
* using "years of schooling" as the "master" information ;
replace mother_ever_schooled = 1 if mother_yrs_educ > 0 & mother_yrs_educ < . ; 
replace father_ever_schooled = 1 if father_yrs_educ > 0 & father_yrs_educ < . ; 

replace mother_ever_schooled = 0 if mother_yrs_educ == 0 ;
replace father_ever_schooled = 0 if father_yrs_educ == 0 ;

replace mother_yrs_educ = 0 if mother_ever_schooled == 0 ;
replace father_yrs_educ = 0 if father_ever_schooled == 0 ;

assert mother_ever_schooled == 0 if mother_yrs_educ == 0 ;
assert father_ever_schooled == 0 if father_yrs_educ == 0 ;

assert mother_yrs_educ == 0 if mother_ever_schooled == 0 ;
assert father_yrs_educ == 0 if father_ever_schooled == 0 ;
	
****************************************************************************** ;
* eligible woman: height 
****************************************************************************** ;

* there are two height measures available, and here we take the first measurement ;
gen height_measure = AP5 ;
label var height_measure "Height (cm)" ;

****************************************************************************** ;
* household: background characteristics 
****************************************************************************** ;

gen hh_exppc = COPC/12 ;
label var hh_exppc "Household monthly expenditure per capita" ;

gen hh_size = NPERSONS ;
label var hh_size "Household size" ;

****************************************************************************** ;
* household: religion
****************************************************************************** ;

merge m:1 IDHH using "./temp/temp-hh" ;
tab _merge ;
drop if _merge == 2 ; * merge == 2 are those in HH data but not in eligible woman survey ;
assert _merge == 3 ;
drop _merge ;

gen religion = ID11 ;
label values religion ID11 ;

gen hindu = religion == 1 ; 
gen muslim = religion == 2 ;
drop religion ;

label var hindu "Hindu" ;
label var muslim "Muslim" ;

****************************************************************************** ;
* household: caste ;
****************************************************************************** ;

gen obc = ID13 == 3 ;
gen sc = ID13 == 4 ;
gen st = ID13 == 5 ;

label var obc "Other Backward Class" ;
label var sc "Scheduled Caste" ;
label var st "Scheduled Tribe" ;

****************************************************************************** ;
* household: urban ;
****************************************************************************** ;

gen urban = URBAN2011 == 1 if URBAN2011 != . ;
gen rural = URBAN2011 == 0 if URBAN2011 != . ;

label var urban "Urban" ;
label var rural "Rural" ;

****************************************************************************** ;
* save data ;
****************************************************************************** ;
* keep track of the wave ;
gen ihds_wave = 2 ;
label var ihds_wave "IHDS Wave" ;

* delete all extraneous variables ;
drop SURVEY ;
drop GE10A-HI11 ;
drop METRO ; 
drop NEWQ-DEFLATOR ;
drop ID11-NEWQELIGIBLE ;
drop RO7-MGMONTHS1 ;
drop HS3D-WS12HH ;
drop WTEW-INDFWT ;

tostring STATEID DISTID PSUID HHID IDHH HHSPLITID PERSONID, replace ;

* organize variable list ;
order STATEID-DISTRICT ihds_wave
urban rural
ever_remarried
married widowed separated prev_married 
hh_size hh_exppc 
hindu muslim 
sc st obc
age height_measure marriage_age gauna_age menarche_age 
mother_yrs_educ mother_ever_schooled 
father_yrs_educ father_ever_schooled  ;

saveold "./output/eligible-women-ihds02-rhs.dta", replace version(12) ;

erase "./temp/temp-women.dta" ;
erase "./temp/temp-hh.dta" ;
erase "./temp/temp-hh-roster.dta" ;

exit ;
